import numpy as np
import sys
print(sys.executable)/home/seamus/Repos/VT0007-deforestation-map/bin/python
The following starter script documents two workflows for deriving deforestation risk maps in accordance with Verra’s new methodology for unplanned deforestation allocation in jurisdictional nested REDD+ projects using the VT0007 toolset.
REDD+, VCS, Verra, Carbon verification, Jurisdictional
The following details two workflow approaches to Verra’s recommended sequence of deforestation risk map development Verra (2021). For comparison purposes, both workflows use the same training sample (Stanimirova et al. 2023) and STAC-formatted Landsat Collection-2 Level-2 imagery.
Workflow-1 is coded in R and is recommended for smaller areas of analysis, as it offers additional functions for model tuning and classifer evaluation. Workflow-2, which is coded in Python and Google Earth Engine functions, is recommended for larger areas of interest (Update java link here).

sits/home/seamus/Repos/VT0007-deforestation-map/bin/python
# assign master crs
crs_master = sf::st_crs("epsg:4326")
aoi_country = geodata::gadm(country="GUY", level=0, path=tempdir()) |>
sf::st_as_sf() |> sf::st_cast() |> sf::st_transform(crs_master)
aoi_states = geodata::gadm(country="GUY", level=1, path=tempdir()) |>
sf::st_as_sf() |> sf::st_cast() |> sf::st_transform(crs_master) |>
dplyr::rename(State = NAME_1)
aoi_target = dplyr::filter(aoi_states, State == "Barima-Waini")
tmap::tmap_mode("view")
tmap::tm_shape(aoi_states) + tmap::tm_borders(col = "white", lwd = 0.5) +
tmap::tm_text("State", col = "white", size = 1, alpha = 0.3, just = "bottom") +
tmap::tm_shape(aoi_country) + tmap::tm_borders(col = "white", lwd = 1) +
tmap::tm_shape(aoi_target) + tmap::tm_borders(col = "red", lwd = 2) +
tmap::tm_text("State", col = "red", size = 1.3) +
tmap::tm_basemap("Esri.WorldImagery")We assemble and process a data cube representing a historical reference period (HRP) between 2013-01-01 and 2023-01-01 for the country of Suriname. Using the sits_regularize functions, we apply a cloud masking and pixel back-filling based on cloudless ranking and median-normalization across 5-year intervals to derive three dry-season mosaics for 2013, 2018 and 2023.
# 2014 -------------------
# cloud-assemble data cube
cube_raw_2014 = sits::sits_cube(
source = "MPC",
collection = "LANDSAT-C2-L2",
bands = c("RED", "GREEN", "BLUE", "NIR08", "SWIR16", "CLOUD"),
roi = aoi_target,
start_date = as.Date("2014-01-01"),
end_date = as.Date("2014-07-01"),
progress = T
)
# regularize data cube
cube_reg_2014 = sits::sits_regularize(
cube = cube_raw_2014,
roi = aoi_target,
res = 60,
period = "P180D",
output_dir = here::here("cubes", "reg", "2014"),
memsize = 16,
multicores = 8,
progress = T
)
# 2019 -------------------
# cloud-assemble data cube
cube_raw_2019 = sits::sits_cube(
source = "MPC",
collection = "LANDSAT-C2-L2",
bands = c("RED", "GREEN", "BLUE", "NIR08", "SWIR16", "CLOUD"),
roi = aoi_target,
start_date = as.Date("2019-01-01"),
end_date = as.Date("2019-07-01"),
progress = T
)
# regularize data cube
cube_reg_2019 = sits::sits_regularize(
cube = cube_raw_2019,
roi = aoi_target,
res = 60,
period = "P180D",
output_dir = here::here("cubes", "reg", "2019"),
memsize = 16,
multicores = 8,
progress = T
)
# 2024 -------------------
# cloud-assemble data cube
cube_raw_2024 = sits::sits_cube(
source = "MPC",
collection = "LANDSAT-C2-L2",
bands = c("RED", "GREEN", "BLUE", "NIR08", "SWIR16", "CLOUD"),
roi = aoi_target,
start_date = as.Date("2024-01-01"),
end_date = as.Date("2024-07-01"),
progress = T
)
# regularize data cube
cube_reg_2024 = sits::sits_regularize(
cube = cube_raw_2024,
roi = aoi_target,
res = 60,
period = "P180D",
output_dir = here::here("cubes", "reg", "2024"),
memsize = 16,
multicores = 8,
progress = T
)# plot cube timelines
sits_timeline(cube_reg_2014)
sits_timeline(cube_reg_2019)
sits_timeline(cube_reg_2024)
plot(cube_reg_2014,
red = "RED",
green = "GREEN",
blue = "BLUE",
date = "2014-01-03"
)
plot(cube_reg_2019,
red = "RED",
green = "GREEN",
blue = "BLUE",
date = "2019-01-08"
)
plot(cube_reg_2024,
red = "RED",
green = "GREEN",
blue = "BLUE",
date = "2024-01-07"
)We import the GLanCE training dataset of annual times series points that includes 7 land cover classes (Figure 2; (Woodcock et al., n.d.)). Training samples are fitted to a Random Forest model and post-processed with a Bayesian smoothing and then evaluated using confusion matrix. The classifier is then calibrated by mapping pixel uncertainty, adding new samples in areas of high uncertainty, reclassifying with improved samples and re-evaluated using confusion matrix.

# Extract training set from gee to drive & import: https://gee-community-catalog.org/projects/glance_training/?h=training
glance_training_url = "https://drive.google.com/file/d/1CgBP2J2OdOhmOiVS4hGibLEMyVLTe1_P/view?usp=drive_link"
# file_name = "glance_training.csv"
# download.file(url = url, path = here::here("training"), destfile = file_name)
glance_training = read.csv(here::here("training", "glance_training.csv"))
data("samples_prodes_4classes")
# Select the same three bands used in the data cube
samples_4classes_3bands <- sits_select(
data = samples_prodes_4classes,
bands = c("B02", "B8A", "B11")
)
# Train a random forest model
rfor_model <- sits_train(
samples = samples_4classes_3bands,
ml_method = sits_rfor()
)
# Classify the small area cube
s2_cube_probs <- sits_classify(
data = s2_reg_cube_ro,
ml_model = rfor_model,
output_dir = "./cubes/02_class/",
memsize = 15,
multicores = 5
)
# Post-process the probability cube
s2_cube_bayes <- sits_smooth(
cube = s2_cube_probs,
output_dir = "./cubes/02_class/",
memsize = 16,
multicores = 4
)
# Label the post-processed probability cube
s2_cube_label <- sits_label_classification(
cube = s2_cube_bayes,
output_dir = "./cubes/02_class/",
memsize = 16,
multicores = 4
)
plot(s2_cube_label)To improve model performance, we estimate class uncertainty and plot these pixel error metrics. Results below reveal highest uncertainty levels in classification of wetland and water areas.
As expected, the places of highest uncertainty are those covered by surface water or associated with wetlands. These places are likely to be misclassified. For this reason, sits provides sits_uncertainty_sampling(), which takes the uncertainty cube as its input and produces a tibble with locations in WGS84 with high uncertainty (Camara et al., n.d.).
We can then use these points of high-uncertainty as new samples to add to our current training dataset. Once we identify their feature classes and relabel them correctly, we append them to derive an augmented samples_round_2.
# Label the new samples
new_samples$label <- "Wetland"
# Obtain the time series from the regularized cube
new_samples_ts <- sits_get_data(
cube = s2_reg_cube_ro,
samples = new_samples
)
# Add new class to original samples
samples_round_2 <- dplyr::bind_rows(
samples_4classes_3bands,
new_samples_ts
)
# Train a RF model with the new sample set
rfor_model_v2 <- sits_train(
samples = samples_round_2,
ml_method = sits_rfor()
)
# Classify the small area cube
s2_cube_probs_v2 <- sits_classify(
data = s2_reg_cube_ro,
ml_model = rfor_model_v2,
output_dir = "./cubes/02_class/",
version = "v2",
memsize = 16,
multicores = 4
)
# Post-process the probability cube
s2_cube_bayes_v2 <- sits_smooth(
cube = s2_cube_probs_v2,
output_dir = "./cubes/04_smooth/",
version = "v2",
memsize = 16,
multicores = 4
)
# Label the post-processed probability cube
s2_cube_label_v2 <- sits_label_classification(
cube = s2_cube_bayes_v2,
output_dir = "./cubes/05_tuned/",
version = "v2",
memsize = 16,
multicores = 4
)
# Plot the second version of the classified cube
plot(s2_cube_label_v2)To select a validation subset of the map, sits recommends Cochran’s method for stratified random sampling (Cochran 1977). The method divides the population into homogeneous subgroups, or strata, and then applying random sampling within each stratum. Alternatively, ad-hoc parameterization is suggested as follows.
ro_sampling_design <- sits_sampling_design(
cube = s2_cube_label_v2,
expected_ua = c(
"Burned_Area" = 0.75,
"Cleared_Area" = 0.70,
"Forest" = 0.75,
"Highly_Degraded" = 0.70,
"Wetland" = 0.70
),
alloc_options = c(120, 100),
std_err = 0.01,
rare_class_prop = 0.1
)
# show sampling desing
ro_sampling_designro_samples_sf <- sits_stratified_sampling(
cube = s2_cube_label_v2,
sampling_design = ro_sampling_design,
alloc = "alloc_120",
multicores = 4,
shp_file = "./samples/ro_samples.shp"
)
sf::st_write(ro_samples_sf,
"./samples/ro_samples.csv",
layer_options = "GEOMETRY=AS_XY",
append = FALSE # TRUE if editing existing sample
)GEE